Aggregated and atomic
scores per method
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
# datasets = read_yaml("datasets.yml")
# print(score_file)
# datasets = read_yaml("datasets.yml")
# datasets = read_yaml(file_dataset)
list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
score_files = list(list.files(path = "./output/scores/", full.names = TRUE))
# nextflow script :
}else{
score_files = list(list.files(pattern = 'score-li*' ))
}
results_li <- data.frame(
dataset = character(),
ref = character(),
preprocessing_mixRNA = character(),
feature_selection_mixRNA = character(),
preprocessing_RNA = character(),
feature_selection_RNA = character(),
preprocessing_scRNA = character(),
feature_selection_scRNA = character(),
deconvolution_rna = character(),
preprocessing_mixMET = character(),
feature_selection_mixMET = character(),
preprocessing_MET = character(),
feature_selection_MET = character(),
deconvolution_met = character(),
late_integration = character(),
aid = numeric(),
aid_norm = numeric(),
aitchison = numeric(),
aitchison_norm = numeric(),
jsd = numeric(),
jsd_norm = numeric(),
mae = numeric(),
mae_norm = numeric(),
pearson_col = numeric(),
pearson_col_norm = numeric(),
pearson_row = numeric(),
pearson_row_norm = numeric(),
pearson_tot = numeric(),
pearson_tot_norm = numeric(),
rmse = numeric(),
rmse_norm = numeric(),
score_aggreg = numeric(),
sdid = numeric(),
sdid_norm = numeric(),
spearman_col = numeric(),
spearman_col_norm = numeric(),
spearman_row = numeric(),
spearman_row_norm = numeric(),
spearman_tot = numeric(),
spearman_tot_norm = numeric()
)
i = 0
for (score_file in score_files[[1]]) {
# Extract the base name of the file
base_name <- basename(score_file)
# Extract components from the file name
components <- str_match(base_name,
# dt ref OMIC ppmR fsmR omic ppR fsR omic ppSR fsSR deR omic ppmM fsmM omic ppM fsM deM li
# "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
"score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
# components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
scores <- read_hdf5(score_file)
# Append the extracted information to the results data frame
results_li <- rbind(results_li,
cbind(
data.frame(
dataset = components[1],
ref = components[2],
preprocessing_mixRNA = components[3],
feature_selection_mixRNA = components[4],
preprocessing_RNA = components[5],
feature_selection_RNA = components[6],
preprocessing_scRNA = components[7],
feature_selection_scRNA = components[8],
deconvolution_rna = components[9],
preprocessing_mixMET = components[10],
feature_selection_mixMET = components[11],
preprocessing_MET = components[12],
feature_selection_MET = components[13],
deconvolution_met = components[14],
late_integration = components[15],
stringsAsFactors = FALSE
),
scores
))
rownames(results_li) = NULL
i = i +1
}
results_li = results_li[results_li$preprocessing_mixRNA != "nopp",]
results_li = results_li[results_li$feature_selection_mixRNA != "nofs",]
results_li = results_li[results_li$preprocessing_RNA != "nopp",]
results_li = results_li[results_li$feature_selection_RNA != "nofs",]
results_li = results_li[results_li$preprocessing_scRNA != "nopp",]
results_li = results_li[results_li$feature_selection_scRNA != "nofs",]
results_li = results_li[results_li$deconvolution_rna != "node",]
results_li = results_li[results_li$preprocessing_mixMET != "nopp",]
results_li = results_li[results_li$feature_selection_mixMET != "nofs",]
results_li = results_li[results_li$preprocessing_MET != "nopp",]
results_li = results_li[results_li$feature_selection_MET != "nofs",]
results_li = results_li[results_li$deconvolution_met != "node",]
results_li %>%
# filter(dc==2) %>%
group_by(late_integration) %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> # A tibble: 1 × 2
#> late_integration GlobalScore
#> <chr> <dbl>
#> 1 limeanRMSE 0.660
results_li_top5 = results_li %>%
select(dataset:late_integration, score_aggreg) %>%
arrange(desc(score_aggreg)) %>%
slice_head(n = 5)
results_li_top5
#> dataset ref preprocessing_mixRNA feature_selection_mixRNA
#> 1 insilicodirichletCopule1 ref LogNorm Toastbulknbfs
#> 2 insilicodirichletCopule1 ref LogNorm Toastbulknbfs
#> 3 insilicodirichletCopule1 ref LogNorm Toastbulknbfs
#> 4 insilicodirichletCopule1 ref LogNorm Toastbulknbfs
#> 5 insilicodirichletCopule1 ref ppID Toastbulknbfs
#> preprocessing_RNA feature_selection_RNA preprocessing_scRNA
#> 1 LogNorm Toastbulknbfs LogNorm
#> 2 LogNorm Toastbulknbfs LogNorm
#> 3 LogNorm Toastbulknbfs ppID
#> 4 LogNorm Toastbulknbfs ppID
#> 5 ppID Toastbulknbfs LogNorm
#> feature_selection_scRNA deconvolution_rna preprocessing_mixMET
#> 1 fsID RLRpoisson normalize
#> 2 Toastbulknbfs RLRpoisson normalize
#> 3 fsID RLRpoisson normalize
#> 4 Toastbulknbfs RLRpoisson normalize
#> 5 fsID RLRpoisson normalize
#> feature_selection_mixMET preprocessing_MET feature_selection_MET
#> 1 mostmethylated normalize mostmethylated
#> 2 mostmethylated normalize mostmethylated
#> 3 mostmethylated normalize mostmethylated
#> 4 mostmethylated normalize mostmethylated
#> 5 mostmethylated normalize mostmethylated
#> deconvolution_met late_integration score_aggreg
#> 1 RLRpoisson limeanRMSE 0.7833919
#> 2 RLRpoisson limeanRMSE 0.7833919
#> 3 RLRpoisson limeanRMSE 0.7833919
#> 4 RLRpoisson limeanRMSE 0.7833919
#> 5 RLRpoisson limeanRMSE 0.7829674
prediction_file = sapply(1:nrow(results_li_top5), \(i){paste0("pred-li-",paste(results_li_top5[i,1:15],collapse = "_") ,".h5")})
pred = lapply(prediction_file, \(path){read_hdf5(path)})
pred
#> [[1]]
#> [[1]]$pred
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> endo 0.1521071 0.18589690 0.18731846 0.16722981 0.2425523 0.1196690
#> fibro 0.3640816 0.37276015 0.44188177 0.51775550 0.4273091 0.5182906
#> immune 0.1151060 0.01246485 0.03708931 0.03192019 0.0487975 0.0000000
#> classic 0.1709578 0.21615714 0.16256007 0.14330491 0.1430618 0.1761545
#> basal 0.1977475 0.21272097 0.17115039 0.13978959 0.1382793 0.1858859
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> endo 0.1779348 0.1361258 0.11851552 0.17461486 0.14241835 0.17775027
#> fibro 0.3856826 0.4662586 0.29611475 0.53547942 0.37782893 0.33925032
#> immune 0.2011419 0.0216533 0.04088747 0.02629235 0.01569903 0.04642653
#> classic 0.1150575 0.1802985 0.26133309 0.12689810 0.22570489 0.21778814
#> basal 0.1201833 0.1956638 0.28314916 0.13671527 0.23834881 0.21878475
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> endo 0.11505408 0.1490773 0.2230690 0.1352964 0.22097151 0.1786065
#> fibro 0.33983213 0.3719341 0.2379239 0.2745355 0.35362157 0.3042754
#> immune 0.02473837 0.0315475 0.2200350 0.1219688 0.07830546 0.0654633
#> classic 0.24958195 0.2184365 0.1639787 0.2301259 0.16518393 0.2152541
#> basal 0.27079346 0.2290046 0.1549935 0.2380734 0.18191752 0.2364007
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> endo 0.188096385 0.1834232 0.1815480 0.13097155 0.1395175 0.09237389
#> fibro 0.474001230 0.4167431 0.3536357 0.48197661 0.4214840 0.37815392
#> immune 0.002126289 0.1539702 0.1200235 0.09911739 0.0118145 0.12069201
#> classic 0.154967867 0.1222691 0.1685829 0.14071420 0.2007344 0.18853796
#> basal 0.180808229 0.1235943 0.1762099 0.14722026 0.2264496 0.22024222
#> [,25] [,26] [,27] [,28] [,29] [,30]
#> endo 0.21666351 0.1896891 0.15463961 0.174534508 0.1436742 0.1807047
#> fibro 0.43920609 0.3598660 0.43725625 0.326870145 0.4468374 0.3574416
#> immune 0.05857245 0.1486802 0.02255197 0.002461908 0.0402629 0.1473147
#> classic 0.14093980 0.1552845 0.17966206 0.236799769 0.1776290 0.1537884
#> basal 0.14461815 0.1464801 0.20589011 0.259333669 0.1915966 0.1607506
#>
#>
#> [[2]]
#> [[2]]$pred
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> endo 0.1521071 0.18589690 0.18731846 0.16722981 0.2425523 0.1196690
#> fibro 0.3640816 0.37276015 0.44188177 0.51775550 0.4273091 0.5182906
#> immune 0.1151060 0.01246485 0.03708931 0.03192019 0.0487975 0.0000000
#> classic 0.1709578 0.21615714 0.16256007 0.14330491 0.1430618 0.1761545
#> basal 0.1977475 0.21272097 0.17115039 0.13978959 0.1382793 0.1858859
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> endo 0.1779348 0.1361258 0.11851552 0.17461486 0.14241835 0.17775027
#> fibro 0.3856826 0.4662586 0.29611475 0.53547942 0.37782893 0.33925032
#> immune 0.2011419 0.0216533 0.04088747 0.02629235 0.01569903 0.04642653
#> classic 0.1150575 0.1802985 0.26133309 0.12689810 0.22570489 0.21778814
#> basal 0.1201833 0.1956638 0.28314916 0.13671527 0.23834881 0.21878475
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> endo 0.11505408 0.1490773 0.2230690 0.1352964 0.22097151 0.1786065
#> fibro 0.33983213 0.3719341 0.2379239 0.2745355 0.35362157 0.3042754
#> immune 0.02473837 0.0315475 0.2200350 0.1219688 0.07830546 0.0654633
#> classic 0.24958195 0.2184365 0.1639787 0.2301259 0.16518393 0.2152541
#> basal 0.27079346 0.2290046 0.1549935 0.2380734 0.18191752 0.2364007
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> endo 0.188096385 0.1834232 0.1815480 0.13097155 0.1395175 0.09237389
#> fibro 0.474001230 0.4167431 0.3536357 0.48197661 0.4214840 0.37815392
#> immune 0.002126289 0.1539702 0.1200235 0.09911739 0.0118145 0.12069201
#> classic 0.154967867 0.1222691 0.1685829 0.14071420 0.2007344 0.18853796
#> basal 0.180808229 0.1235943 0.1762099 0.14722026 0.2264496 0.22024222
#> [,25] [,26] [,27] [,28] [,29] [,30]
#> endo 0.21666351 0.1896891 0.15463961 0.174534508 0.1436742 0.1807047
#> fibro 0.43920609 0.3598660 0.43725625 0.326870145 0.4468374 0.3574416
#> immune 0.05857245 0.1486802 0.02255197 0.002461908 0.0402629 0.1473147
#> classic 0.14093980 0.1552845 0.17966206 0.236799769 0.1776290 0.1537884
#> basal 0.14461815 0.1464801 0.20589011 0.259333669 0.1915966 0.1607506
#>
#>
#> [[3]]
#> [[3]]$pred
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> endo 0.1521071 0.18589690 0.18731846 0.16722981 0.2425523 0.1196690
#> fibro 0.3640816 0.37276015 0.44188177 0.51775550 0.4273091 0.5182906
#> immune 0.1151060 0.01246485 0.03708931 0.03192019 0.0487975 0.0000000
#> classic 0.1709578 0.21615714 0.16256007 0.14330491 0.1430618 0.1761545
#> basal 0.1977475 0.21272097 0.17115039 0.13978959 0.1382793 0.1858859
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> endo 0.1779348 0.1361258 0.11851552 0.17461486 0.14241835 0.17775027
#> fibro 0.3856826 0.4662586 0.29611475 0.53547942 0.37782893 0.33925032
#> immune 0.2011419 0.0216533 0.04088747 0.02629235 0.01569903 0.04642653
#> classic 0.1150575 0.1802985 0.26133309 0.12689810 0.22570489 0.21778814
#> basal 0.1201833 0.1956638 0.28314916 0.13671527 0.23834881 0.21878475
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> endo 0.11505408 0.1490773 0.2230690 0.1352964 0.22097151 0.1786065
#> fibro 0.33983213 0.3719341 0.2379239 0.2745355 0.35362157 0.3042754
#> immune 0.02473837 0.0315475 0.2200350 0.1219688 0.07830546 0.0654633
#> classic 0.24958195 0.2184365 0.1639787 0.2301259 0.16518393 0.2152541
#> basal 0.27079346 0.2290046 0.1549935 0.2380734 0.18191752 0.2364007
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> endo 0.188096385 0.1834232 0.1815480 0.13097155 0.1395175 0.09237389
#> fibro 0.474001230 0.4167431 0.3536357 0.48197661 0.4214840 0.37815392
#> immune 0.002126289 0.1539702 0.1200235 0.09911739 0.0118145 0.12069201
#> classic 0.154967867 0.1222691 0.1685829 0.14071420 0.2007344 0.18853796
#> basal 0.180808229 0.1235943 0.1762099 0.14722026 0.2264496 0.22024222
#> [,25] [,26] [,27] [,28] [,29] [,30]
#> endo 0.21666351 0.1896891 0.15463961 0.174534508 0.1436742 0.1807047
#> fibro 0.43920609 0.3598660 0.43725625 0.326870145 0.4468374 0.3574416
#> immune 0.05857245 0.1486802 0.02255197 0.002461908 0.0402629 0.1473147
#> classic 0.14093980 0.1552845 0.17966206 0.236799769 0.1776290 0.1537884
#> basal 0.14461815 0.1464801 0.20589011 0.259333669 0.1915966 0.1607506
#>
#>
#> [[4]]
#> [[4]]$pred
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> endo 0.1521071 0.18589690 0.18731846 0.16722981 0.2425523 0.1196690
#> fibro 0.3640816 0.37276015 0.44188177 0.51775550 0.4273091 0.5182906
#> immune 0.1151060 0.01246485 0.03708931 0.03192019 0.0487975 0.0000000
#> classic 0.1709578 0.21615714 0.16256007 0.14330491 0.1430618 0.1761545
#> basal 0.1977475 0.21272097 0.17115039 0.13978959 0.1382793 0.1858859
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> endo 0.1779348 0.1361258 0.11851552 0.17461486 0.14241835 0.17775027
#> fibro 0.3856826 0.4662586 0.29611475 0.53547942 0.37782893 0.33925032
#> immune 0.2011419 0.0216533 0.04088747 0.02629235 0.01569903 0.04642653
#> classic 0.1150575 0.1802985 0.26133309 0.12689810 0.22570489 0.21778814
#> basal 0.1201833 0.1956638 0.28314916 0.13671527 0.23834881 0.21878475
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> endo 0.11505408 0.1490773 0.2230690 0.1352964 0.22097151 0.1786065
#> fibro 0.33983213 0.3719341 0.2379239 0.2745355 0.35362157 0.3042754
#> immune 0.02473837 0.0315475 0.2200350 0.1219688 0.07830546 0.0654633
#> classic 0.24958195 0.2184365 0.1639787 0.2301259 0.16518393 0.2152541
#> basal 0.27079346 0.2290046 0.1549935 0.2380734 0.18191752 0.2364007
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> endo 0.188096385 0.1834232 0.1815480 0.13097155 0.1395175 0.09237389
#> fibro 0.474001230 0.4167431 0.3536357 0.48197661 0.4214840 0.37815392
#> immune 0.002126289 0.1539702 0.1200235 0.09911739 0.0118145 0.12069201
#> classic 0.154967867 0.1222691 0.1685829 0.14071420 0.2007344 0.18853796
#> basal 0.180808229 0.1235943 0.1762099 0.14722026 0.2264496 0.22024222
#> [,25] [,26] [,27] [,28] [,29] [,30]
#> endo 0.21666351 0.1896891 0.15463961 0.174534508 0.1436742 0.1807047
#> fibro 0.43920609 0.3598660 0.43725625 0.326870145 0.4468374 0.3574416
#> immune 0.05857245 0.1486802 0.02255197 0.002461908 0.0402629 0.1473147
#> classic 0.14093980 0.1552845 0.17966206 0.236799769 0.1776290 0.1537884
#> basal 0.14461815 0.1464801 0.20589011 0.259333669 0.1915966 0.1607506
#>
#>
#> [[5]]
#> [[5]]$pred
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> endo 0.1516212 0.18557596 0.18728482 0.16714354 0.2423916 0.1198319
#> fibro 0.3602391 0.36979272 0.44009159 0.51687221 0.4261273 0.5161957
#> immune 0.1184958 0.01480753 0.03770895 0.03198968 0.0490858 0.0000000
#> classic 0.1712810 0.21620232 0.16256378 0.14323721 0.1430556 0.1764022
#> basal 0.1983629 0.21362147 0.17235087 0.14075736 0.1393397 0.1875702
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> endo 0.1772867 0.13612665 0.11771890 0.17435430 0.14237645 0.17762951
#> fibro 0.3822496 0.46380994 0.29453980 0.53200159 0.37584410 0.33700328
#> immune 0.2037650 0.02260563 0.04343975 0.02773796 0.01717599 0.04729253
#> classic 0.1155581 0.18036773 0.26164321 0.12743644 0.22543944 0.21781232
#> basal 0.1211406 0.19709005 0.28265834 0.13846971 0.23916402 0.22026235
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> endo 0.11517104 0.14895442 0.2219482 0.1348663 0.22012318 0.17811381
#> fibro 0.33751651 0.36941897 0.2354276 0.2717808 0.35101142 0.30139336
#> immune 0.02666597 0.03283758 0.2229607 0.1244608 0.08148366 0.06828831
#> classic 0.24929900 0.21853958 0.1640731 0.2304053 0.16512537 0.21526446
#> basal 0.27134747 0.23024944 0.1555904 0.2384868 0.18225637 0.23694006
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> endo 0.18796456 0.1826527 0.1805234 0.1310479 0.13952973 0.09225906
#> fibro 0.47164846 0.4128160 0.3507678 0.4783769 0.41869165 0.37438850
#> immune 0.00329991 0.1569174 0.1227414 0.1014937 0.01298726 0.12359803
#> classic 0.15513086 0.1228646 0.1689692 0.1408919 0.20090673 0.18890329
#> basal 0.18195620 0.1247493 0.1769981 0.1481896 0.22788463 0.22085111
#> [,25] [,26] [,27] [,28] [,29] [,30]
#> endo 0.21603226 0.1892314 0.1546453 0.17423442 0.14302748 0.1806570
#> fibro 0.43664061 0.3572180 0.4342529 0.32451536 0.44308068 0.3546594
#> immune 0.06020253 0.1510027 0.0247478 0.00361847 0.04352868 0.1496000
#> classic 0.14131147 0.1552811 0.1796548 0.23693327 0.17798508 0.1537389
#> basal 0.14581313 0.1472669 0.2066992 0.26069847 0.19237808 0.1613447
all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
results_li[[data_used]] = factor(results_li[[data_used]],
levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}
all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
results_li[[fun]] = factor(results_li[[fun]],
levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}
index_aggreg <- which(names(results_li) == "score_aggreg")
datatable(
results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
extensions = 'Buttons',
options = list(
pageLength = 10,
dom = 'Bfrtip', # This includes the Buttons extension in the layout
buttons = list(
list(
extend = 'colvis',
text = 'Show/Hide Columns',
columns = ':not(:first-child)' # This allows all columns except the first to be toggled
)
)
)
)